import requests, re, csv

# 拿到页面源代码
# 通过正则来提取想要的内容

url = 'https://movie.douban.com/chart'
param = {
    "channel": {"notification": {"user": "148853892"}},
    "auth": {"148853892_1688912243": "8706d6d8b99474b7c86d75a783f0d27ffda2534b"}}
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"}
req = requests.get(url=url, headers=header)
print(req.text)

# (?P<name>.*?)
# (r"<div class='(?P<name>.*?)><span id='(?P<number>\d+)'>(?P<pp>.*?)</span></div>", re.S)
# zz = re.compile(r'class="">(?P<name1>.*?)/ <span style="font-size:13px;">(?P<name2>.*?)', re.S)


zz = re.compile(r'<table width="100%" class="">.*?<span style="font-size:13px;">(?P<name>.*?)</span>.*?<p '
                r'class="pl">(?P<date>.*?)</p>.*?<span class="rating_nums">(?P<score>.*?)</span>.*?<span class="pl">('
                r'?P<people>.*?)</span>', re.S)
resp = zz.finditer(req.text)
print("=====================================")
f = open("film.csv", mode="w", encoding="utf-8")  # 打开文件写入模式操作行为
datecsv = csv.writer(f)  # csv写入文件内容，行为是f定义的文件名，写入模式，编码格式
# print(resp)
for i in resp:
    print(i.group("name"))
    print(i.group("date")[0:10])
    print(i.group("score"))
    print(i.group("people"), "\n")
    print('------------------------')
    print(i.groupdict())
    dic = i.groupdict()  # 数据以字典类型返回
    datecsv.writerow(dic.values())  # csv写入字典的值
f.close()
print("over!")


